library(tidyverse)
library(GGally)   # ggplot extensions
library(caret)
library(magrittr)
library(skimr)
library(janitor)  # for data cleaning purposes
library(glmnet)   # the main package for penalized linear models
library(broom)    # for tidying regression coefficient outputs
library(knitr)
library(kableExtra)  # for nicer tables in rmarkdown

theme_set(theme_bw())   # globally set ggplot theme

Disclaimer: this lab borrows lots of ideas and examples from this great tutorial.

Data: Ames Housing dataset

We are going to predict house sales prices in Ames, Iowa. See here.

library(AmesHousing)
housing_data <- make_ames()
housing_data <- clean_names(housing_data) # fromjaniter,所有分隔符和大小写都被统一了
housing_data <- housing_data %>% mutate(log_sale_price = log(sale_price))
skim(housing_data)

Look at the outcome variable, the price of houses.

ggplot(data = housing_data, aes(x = sale_price)) + geom_density()

ggplot(data = housing_data, aes(x = log_sale_price)) + geom_density()

Let’s see correlations of features and the outcome variable. Based on this, simple benchmark models can quickly be put together.

A baseline model

term estimate std.error statistic p.value
(Intercept) 11.318 0.023 498.266 0
gr_liv_area 0.001 0.000 38.602 0
tot_rms_abv_grd -0.052 0.006 -9.006 0

Weird pattern: sales price decreases with the number of rooms? This is spurious and is caused by the high positive correlation between the feature variables. Univariate regressions have the intuitive signs:

term estimate std.error statistic p.value
(Intercept) 11.180 0.017 660.123 0
gr_liv_area 0.001 0.000 52.430 0
term estimate std.error statistic p.value
(Intercept) 11.199 0.028 405.172 0
tot_rms_abv_grd 0.128 0.004 30.627 0

Penalized methods offer a solution to these kinds of patterns.

Set up training and test (holdout) datasets

set.seed(1234)
training_ratio <- 0.7
train_indices <- createDataPartition(
  y = housing_data[["log_sale_price"]],
  times = 1,
  p = training_ratio,
  list = FALSE
) %>% as.vector()
data_train <- housing_data[train_indices, ]
data_test <- housing_data[-train_indices, ]

fit_control <- trainControl(method = "cv", number = 10)

Penalize large coefficients: the Ridge regression

The ridge regression adds a penalty term to the sum of squared residuals: the sum of squares of the regression coefficients. This puts a cost on having large coefficients. Result: biased but lower variance model.

features
 [1] "ms_sub_class"       "ms_zoning"          "lot_frontage"       "lot_area"          
 [5] "street"             "alley"              "lot_shape"          "land_contour"      
 [9] "utilities"          "lot_config"         "land_slope"         "neighborhood"      
[13] "condition_1"        "condition_2"        "bldg_type"          "house_style"       
[17] "overall_qual"       "overall_cond"       "year_built"         "year_remod_add"    
[21] "roof_style"         "roof_matl"          "exterior_1st"       "exterior_2nd"      
[25] "mas_vnr_type"       "mas_vnr_area"       "exter_qual"         "exter_cond"        
[29] "foundation"         "bsmt_qual"          "bsmt_cond"          "bsmt_exposure"     
[33] "bsmt_fin_type_1"    "bsmt_fin_sf_1"      "bsmt_fin_type_2"    "bsmt_fin_sf_2"     
[37] "bsmt_unf_sf"        "total_bsmt_sf"      "heating"            "heating_qc"        
[41] "central_air"        "electrical"         "first_flr_sf"       "second_flr_sf"     
[45] "low_qual_fin_sf"    "gr_liv_area"        "bsmt_full_bath"     "bsmt_half_bath"    
[49] "full_bath"          "half_bath"          "bedroom_abv_gr"     "kitchen_abv_gr"    
[53] "kitchen_qual"       "tot_rms_abv_grd"    "functional"         "fireplaces"        
[57] "fireplace_qu"       "garage_type"        "garage_finish"      "garage_cars"       
[61] "garage_area"        "garage_qual"        "garage_cond"        "paved_drive"       
[65] "wood_deck_sf"       "open_porch_sf"      "enclosed_porch"     "three_season_porch"
[69] "screen_porch"       "pool_area"          "pool_qc"            "fence"             
[73] "misc_feature"       "misc_val"           "mo_sold"            "year_sold"         
[77] "sale_type"          "sale_condition"     "longitude"          "latitude"          

First we are going to directly work with the glmnet package to estimate penalized models. Then we look at how this can be implemented through caret.

# glmnet needs inputs as a matrix. model.matrix: handles factor variables
# -1: we do not need the intercept as glment will automatically include it
x_train <- model.matrix( ~ . -1, data_train[, features, with = FALSE])
dim(x_train)
[1] 2053  309
# standardization of variables is automatically done by glmnet

# how much penalty do we want to apply? select with CV
lambda_grid <- 10^seq(2,-5,length=100)  

set.seed(1234)
ridge_model <- glmnet(
  x = x_train, y = data_train[["log_sale_price"]], 
  family = "gaussian", # for continuous response
  alpha = 0  # the ridge model
)

plot(ridge_model, xvar = "lambda")

Look at some individual coefficients.

# helper function to extract the coefficient sequence as a data.table
get_glmnet_coeff_sequence <- function(glmnet_model) {
  coeff_sequence <- coef(glmnet_model) %>% tidy()
  names(coeff_sequence) <- c("variable", "lambda_id", "value")

  lambdas <- tibble(
    lambda = glmnet_model$lambda, 
    lambda_id = paste0("s", 0:(length(glmnet_model$lambda) - 1))
  )
  
  dplyr::inner_join(coeff_sequence, lambdas, by = "lambda_id") 
}
ridge_coeffs <- get_glmnet_coeff_sequence(ridge_model)
'tidy.dgCMatrix' is deprecated.
See help("Deprecated")'tidy.dgTMatrix' is deprecated.
See help("Deprecated")
selected_variables <- c("gr_liv_area", "tot_rms_abv_grd", "garage_area",  "kitchen_abv_gr")
ggplot(
  data = ridge_coeffs %>% filter(variable %in% selected_variables),
  aes(x = log(lambda), y = value)) +
    geom_line() +
  facet_wrap(~ variable, scales = "free_y", ncol = 1)

We can use cross-validation to determine the optimal penalty term weight. Two lambda values marked on the plot: one with the minimal CV RMSE, the other is the simplest model (highest lambda) which contains the optimal lambda’s error within one standard deviation. That is, it gives the simplest model that is still “good enough”.

set.seed(1234)
ridge_model_cv <- cv.glmnet(
  x = x_train, y = data_train[["log_sale_price"]], 
  family = "gaussian",
  alpha = 0,
  nfolds = 10
)

best_lambda <- ridge_model_cv$lambda.min
message(paste0("The optimally chosen penalty parameter: ", best_lambda))
The optimally chosen penalty parameter: 0.180129135344916
highest_good_enough_lambda <- ridge_model_cv$lambda.1se
message(paste0("The highest good enough penalty parameter: ", highest_good_enough_lambda))
The highest good enough penalty parameter: 0.875896216853839

We can also use caret to estimate ridge models. This lets us compare it later to any other model estimated with caret, using, for example, cross-validation with exactly the same folds.

# ridge model
ridge_tune_grid <- expand.grid(
  "alpha" = c(0),
  "lambda" = seq(0.05, 0.5, by = 0.025)
)

set.seed(857)
ridge_fit <- train(
  log_sale_price ~ . -sale_price,
  data = data_train,
  method = "glmnet",
  preProcess = c("center", "scale"),
  tuneGrid = ridge_tune_grid,
  trControl = fit_control
)
ridge_fit
glmnet 

2053 samples
  81 predictor

Pre-processing: centered (308), scaled (308) 
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 1847, 1849, 1849, 1847, 1848, 1847, ... 
Resampling results across tuning parameters:

  lambda  RMSE       Rsquared   MAE       
  0.050   0.1464742  0.8707003  0.08654948
  0.075   0.1446715  0.8732402  0.08641530
  0.100   0.1436810  0.8746908  0.08668066
  0.125   0.1431577  0.8755367  0.08716370
  0.150   0.1429352  0.8760033  0.08772921
  0.175   0.1429234  0.8762089  0.08830684
  0.200   0.1430586  0.8762391  0.08891297
  0.225   0.1433025  0.8761429  0.08951049
  0.250   0.1436295  0.8759524  0.09012103
  0.275   0.1440211  0.8756904  0.09074102
  0.300   0.1444622  0.8753753  0.09135751
  0.325   0.1449408  0.8750214  0.09196578
  0.350   0.1454497  0.8746372  0.09257827
  0.375   0.1459867  0.8742271  0.09318557
  0.400   0.1465406  0.8737999  0.09377238
  0.425   0.1471172  0.8733518  0.09436542
  0.450   0.1477083  0.8728943  0.09495540
  0.475   0.1483059  0.8724337  0.09554946
  0.500   0.1489224  0.8719555  0.09615197

Tuning parameter 'alpha' was held constant at a value of 0
RMSE was used to select the optimal model using the smallest value.
The final values used for the model were alpha = 0 and lambda = 0.175.

Another variant: LASSO regression

While Ridge applies a constraint on the sum of squares of coefficients, LASSO does the same for the sum of the absolute values of coefficients.

This seemingly small difference has important sconsequences: some coefficients are set exactly to zero, others are only shrunk towards zero.

set.seed(1234)
lasso_model <- glmnet(
  x = x_train, y = data_train[["log_sale_price"]], 
  family = "gaussian",
  alpha = 1  # the lasso model
)

plot(lasso_model, xvar = "lambda")

Again, let’s look at individual coefficients. We can see that some are set exactly to zero for higher values of the penalty term. This is in contrast to what we saw with the Ridge model.

lasso_coeffs <- get_glmnet_coeff_sequence(lasso_model)
'tidy.dgCMatrix' is deprecated.
See help("Deprecated")'tidy.dgTMatrix' is deprecated.
See help("Deprecated")
selected_variables <- c("gr_liv_area", "tot_rms_abv_grd", "garage_area", "kitchen_abv_gr")
ggplot(
  data = lasso_coeffs %>% filter(variable %in% selected_variables),
  aes(x = log(lambda), y = value)) +
    geom_line() +
  facet_wrap(~ variable, scales = "free_y", ncol = 1)

Again, we can apply cross-validation to determine the optimal value for the penalty term.

set.seed(1234)
lasso_model_cv <- cv.glmnet(
  x = x_train, y = data_train[["log_sale_price"]], 
  family = "gaussian",
  alpha = 1,
  nfolds = 10
)

best_lambda <- lasso_model_cv$lambda.min
message(paste0("The optimally chosen penalty parameter: ", best_lambda))
The optimally chosen penalty parameter: 0.00388076456762921
highest_good_enough_lambda <- lasso_model_cv$lambda.1se
message(paste0("The highest good enough penalty parameter: ", highest_good_enough_lambda))
The highest good enough penalty parameter: 0.0227297188251623
plot(lasso_model_cv)

Fitting LASSO models with caret is similar to that of Ridge.

tenpowers <- 10^seq(-1, -5, by = -1)

lasso_tune_grid <- expand.grid(
  "alpha" = c(1),
  "lambda" = c(tenpowers, tenpowers / 2) 
)

set.seed(857)
lasso_fit <- train(
  log_sale_price ~ . -sale_price,
  data = data_train,
  method = "glmnet",
  preProcess = c("center", "scale"),
  tuneGrid = lasso_tune_grid,
  trControl = fit_control
)
lasso_fit
glmnet 

2053 samples
  81 predictor

Pre-processing: centered (308), scaled (308) 
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 1847, 1849, 1849, 1847, 1848, 1847, ... 
Resampling results across tuning parameters:

  lambda  RMSE       Rsquared   MAE       
  5e-06   0.1724948  0.8316208  0.08760109
  1e-05   0.1724948  0.8316208  0.08760109
  5e-05   0.1719195  0.8325130  0.08753221
  1e-04   0.1701065  0.8353641  0.08724015
  5e-04   0.1608812  0.8493622  0.08610976
  1e-03   0.1549066  0.8580135  0.08566133
  5e-03   0.1481077  0.8664453  0.08711685
  1e-02   0.1534121  0.8580125  0.09303712
  5e-02   0.1970917  0.8002051  0.13039984
  1e-01   0.2394941  0.7692260  0.16621033

Tuning parameter 'alpha' was held constant at a value of 1
RMSE was used to select the optimal model using the smallest value.
The final values used for the model were alpha = 1 and lambda = 0.005.
ggplot(lasso_fit) + scale_x_log10()

Variable selection - why is it happening?

Source: Introduction to Statistical Learning The constraints are different: LASSO’s has corners, Ridge’s is smooth. Corners mean some coefficients being exactly zero.

Source: Introduction to Statistical Learning

Combine Ridge and LASSO: Elastic net

We can combine both types of penalties. LASSO is attractive since it performs principled variable selection. However, when having correlated features, typically only one of them - quite arbitrarily - is kept in the model. Ridge simultaneously shrinks coefficients of these towards zero. If we apply penalties of both the absolute values and the squares of the coefficients, both virtues are retained. This method is called Elastic net.

enet_tune_grid <- expand.grid(
  "alpha" = seq(0, 1, by = 0.1),
  "lambda" = union(lasso_tune_grid[["lambda"]], ridge_tune_grid[["lambda"]])
)

set.seed(857)
enet_fit <- train(
  log_sale_price ~ . -sale_price,
  data = data_train,
  method = "glmnet",
  preProcess = c("center", "scale"),
  tuneGrid = enet_tune_grid,
  trControl = fit_control
)
enet_fit
glmnet 

2053 samples
  81 predictor

Pre-processing: centered (308), scaled (308) 
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 1847, 1849, 1849, 1847, 1848, 1847, ... 
Resampling results across tuning parameters:

  alpha  lambda    RMSE       Rsquared   MAE       
  0.0    0.000005  0.1494346  0.8665309  0.08716389
  0.0    0.000010  0.1494346  0.8665309  0.08716389
  0.0    0.000050  0.1494346  0.8665309  0.08716389
  0.0    0.000100  0.1494346  0.8665309  0.08716389
  0.0    0.000500  0.1494346  0.8665309  0.08716389
  0.0    0.001000  0.1494346  0.8665309  0.08716389
  0.0    0.005000  0.1494346  0.8665309  0.08716389
  0.0    0.010000  0.1494346  0.8665309  0.08716389
  0.0    0.050000  0.1464742  0.8707003  0.08654948
  0.0    0.075000  0.1446715  0.8732402  0.08641530
  0.0    0.100000  0.1436810  0.8746908  0.08668066
  0.0    0.125000  0.1431577  0.8755367  0.08716370
  0.0    0.150000  0.1429352  0.8760033  0.08772921
  0.0    0.175000  0.1429234  0.8762089  0.08830684
  0.0    0.200000  0.1430586  0.8762391  0.08891297
  0.0    0.225000  0.1433025  0.8761429  0.08951049
  0.0    0.250000  0.1436295  0.8759524  0.09012103
  0.0    0.275000  0.1440211  0.8756904  0.09074102
  0.0    0.300000  0.1444622  0.8753753  0.09135751
  0.0    0.325000  0.1449408  0.8750214  0.09196578
  0.0    0.350000  0.1454497  0.8746372  0.09257827
  0.0    0.375000  0.1459867  0.8742271  0.09318557
  0.0    0.400000  0.1465406  0.8737999  0.09377238
  0.0    0.425000  0.1471172  0.8733518  0.09436542
  0.0    0.450000  0.1477083  0.8728943  0.09495540
  0.0    0.475000  0.1483059  0.8724337  0.09554946
  0.0    0.500000  0.1489224  0.8719555  0.09615197
  0.1    0.000005  0.1670353  0.8403192  0.08758031
  0.1    0.000010  0.1670353  0.8403192  0.08758031
  0.1    0.000050  0.1670353  0.8403192  0.08758031
  0.1    0.000100  0.1670353  0.8403192  0.08758031
  0.1    0.000500  0.1652817  0.8430854  0.08759484
  0.1    0.001000  0.1626288  0.8471310  0.08753583
  0.1    0.005000  0.1537384  0.8599886  0.08648934
  0.1    0.010000  0.1497553  0.8655036  0.08577203
  0.1    0.050000  0.1449925  0.8724173  0.08752321
  0.1    0.075000  0.1472820  0.8700995  0.09078731
  0.1    0.100000  0.1510769  0.8657510  0.09442049
  0.1    0.125000  0.1552411  0.8610822  0.09824876
  0.1    0.150000  0.1591302  0.8570805  0.10182023
  0.1    0.175000  0.1628752  0.8535522  0.10509368
  0.1    0.200000  0.1667296  0.8500137  0.10831016
  0.1    0.225000  0.1705956  0.8465929  0.11144985
  0.1    0.250000  0.1744036  0.8434133  0.11449097
  0.1    0.275000  0.1782365  0.8402647  0.11748185
  0.1    0.300000  0.1821332  0.8370731  0.12043730
  0.1    0.325000  0.1859880  0.8340924  0.12340008
  0.1    0.350000  0.1898015  0.8312907  0.12637167
  0.1    0.375000  0.1935818  0.8286476  0.12930385
  0.1    0.400000  0.1973826  0.8260119  0.13228109
  0.1    0.425000  0.2011854  0.8233597  0.13526745
  0.1    0.450000  0.2049773  0.8207158  0.13827386
  0.1    0.475000  0.2087777  0.8180422  0.14127767
  0.1    0.500000  0.2124801  0.8156490  0.14421317
  0.2    0.000005  0.1686982  0.8376346  0.08746792
  0.2    0.000010  0.1686982  0.8376346  0.08746792
  0.2    0.000050  0.1686982  0.8376346  0.08746792
  0.2    0.000100  0.1686982  0.8376346  0.08746792
  0.2    0.000500  0.1645699  0.8442078  0.08732577
  0.2    0.001000  0.1612194  0.8492273  0.08703339
  0.2    0.005000  0.1516590  0.8626889  0.08593693
  0.2    0.010000  0.1468238  0.8694456  0.08539376
  0.2    0.050000  0.1514324  0.8632664  0.09349835
  0.2    0.075000  0.1582912  0.8549462  0.10037994
  0.2    0.100000  0.1647081  0.8479959  0.10625892
  0.2    0.125000  0.1712318  0.8413826  0.11168947
  0.2    0.150000  0.1780718  0.8344974  0.11702932
  0.2    0.175000  0.1847363  0.8282441  0.12218695
  0.2    0.200000  0.1912079  0.8226110  0.12726045
  0.2    0.225000  0.1977384  0.8169232  0.13226425
  0.2    0.250000  0.2041532  0.8116526  0.13724719
  0.2    0.275000  0.2103213  0.8073177  0.14212879
  0.2    0.300000  0.2164762  0.8031795  0.14704757
  0.2    0.325000  0.2226729  0.7989754  0.15202946
  0.2    0.350000  0.2289062  0.7946446  0.15706538
  0.2    0.375000  0.2349507  0.7908896  0.16208311
  0.2    0.400000  0.2406921  0.7880980  0.16690543
  0.2    0.425000  0.2463364  0.7857864  0.17172436
  0.2    0.450000  0.2519374  0.7837543  0.17655693
  0.2    0.475000  0.2575106  0.7818457  0.18143574
  0.2    0.500000  0.2631246  0.7798631  0.18640559
  0.3    0.000005  0.1694889  0.8363694  0.08742121
  0.3    0.000010  0.1694889  0.8363694  0.08742121
  0.3    0.000050  0.1694889  0.8363694  0.08742121
  0.3    0.000100  0.1693230  0.8366335  0.08741713
  0.3    0.000500  0.1640506  0.8449649  0.08710775
  0.3    0.001000  0.1601442  0.8507472  0.08670681
  0.3    0.005000  0.1497165  0.8652159  0.08569673
  0.3    0.010000  0.1461510  0.8701295  0.08560862
  0.3    0.050000  0.1582715  0.8536112  0.09981780
  0.3    0.075000  0.1671751  0.8435546  0.10803163
  0.3    0.100000  0.1766422  0.8333271  0.11561899
  0.3    0.125000  0.1857002  0.8244686  0.12269191
  0.3    0.150000  0.1950144  0.8152437  0.12978800
  0.3    0.175000  0.2039044  0.8073794  0.13669108
  0.3    0.200000  0.2125468  0.8005866  0.14354836
  0.3    0.225000  0.2210106  0.7943273  0.15026194
  0.3    0.250000  0.2289228  0.7900343  0.15675433
  0.3    0.275000  0.2367623  0.7866223  0.16347045
  0.3    0.300000  0.2446801  0.7833307  0.17036884
  0.3    0.325000  0.2525252  0.7806005  0.17735528
  0.3    0.350000  0.2603597  0.7782232  0.18438724
  0.3    0.375000  0.2682727  0.7757639  0.19159587
  0.3    0.400000  0.2762823  0.7729326  0.19892319
  0.3    0.425000  0.2842662  0.7698784  0.20626618
  0.3    0.450000  0.2922789  0.7664228  0.21360385
  0.3    0.475000  0.3001680  0.7629534  0.22076395
  0.3    0.500000  0.3078466  0.7601795  0.22767306
  0.4    0.000005  0.1704846  0.8348458  0.08741585
  0.4    0.000010  0.1704846  0.8348458  0.08741585
  0.4    0.000050  0.1704846  0.8348458  0.08741585
  0.4    0.000100  0.1696535  0.8361684  0.08736964
  0.4    0.000500  0.1636200  0.8455879  0.08689064
  0.4    0.001000  0.1595481  0.8515146  0.08646928
  0.4    0.005000  0.1479330  0.8676950  0.08557608
  0.4    0.010000  0.1467300  0.8689270  0.08627611
  0.4    0.050000  0.1637869  0.8463652  0.10479141
  0.4    0.075000  0.1757953  0.8326936  0.11464710
  0.4    0.100000  0.1876609  0.8200835  0.12378325
  0.4    0.125000  0.1993287  0.8083322  0.13276478
  0.4    0.150000  0.2103536  0.7987837  0.14146575
  0.4    0.175000  0.2205396  0.7919677  0.14968744
  0.4    0.200000  0.2304581  0.7871512  0.15808533
  0.4    0.225000  0.2405416  0.7826474  0.16696311
  0.4    0.250000  0.2505703  0.7791898  0.17594585
  0.4    0.275000  0.2608273  0.7755577  0.18529023
  0.4    0.300000  0.2711586  0.7717192  0.19474537
  0.4    0.325000  0.2815382  0.7674957  0.20427500
  0.4    0.350000  0.2917695  0.7639305  0.21357868
  0.4    0.375000  0.3019084  0.7607288  0.22272180
  0.4    0.400000  0.3119532  0.7578097  0.23163290
  0.4    0.425000  0.3219891  0.7534710  0.24046073
  0.4    0.450000  0.3320417  0.7471823  0.24926727
  0.4    0.475000  0.3420369  0.7378820  0.25797869
  0.4    0.500000  0.3517341  0.7247923  0.26634789
  0.5    0.000005  0.1708418  0.8342819  0.08737420
  0.5    0.000010  0.1708418  0.8342819  0.08737420
  0.5    0.000050  0.1708418  0.8342819  0.08737420
  0.5    0.000100  0.1695925  0.8362622  0.08731825
  0.5    0.000500  0.1632201  0.8461364  0.08679384
  0.5    0.001000  0.1591060  0.8520758  0.08629427
  0.5    0.005000  0.1470707  0.8688008  0.08550668
  0.5    0.010000  0.1474686  0.8675998  0.08710968
  0.5    0.050000  0.1695629  0.8386063  0.10935270
  0.5    0.075000  0.1841805  0.8219876  0.12081526
  0.5    0.100000  0.1985376  0.8064791  0.13187596
  0.5    0.125000  0.2114672  0.7953057  0.14216486
  0.5    0.150000  0.2233297  0.7884573  0.15205946
  0.5    0.175000  0.2353497  0.7827136  0.16257297
  0.5    0.200000  0.2473769  0.7784926  0.17331738
  0.5    0.225000  0.2597029  0.7740997  0.18454370
  0.5    0.250000  0.2723377  0.7689306  0.19602060
  0.5    0.275000  0.2849918  0.7639630  0.20749237
  0.5    0.300000  0.2975115  0.7602207  0.21871114
  0.5    0.325000  0.3101928  0.7549867  0.23001788
  0.5    0.350000  0.3229323  0.7472781  0.24129766
  0.5    0.375000  0.3355561  0.7357114  0.25234518
  0.5    0.400000  0.3480588  0.7176339  0.26315506
  0.5    0.425000  0.3603446  0.6884627  0.27365906
  0.5    0.450000  0.3715257  0.6576301  0.28316301
  0.5    0.475000  0.3816336  0.6214163  0.29169979
  0.5    0.500000  0.3900225  0.6099986  0.29872411
  0.6    0.000005  0.1707480  0.8344097  0.08737420
  0.6    0.000010  0.1707480  0.8344097  0.08737420
  0.6    0.000050  0.1707511  0.8344121  0.08737744
  0.6    0.000100  0.1696843  0.8361159  0.08730022
  0.6    0.000500  0.1627459  0.8468054  0.08669446
  0.6    0.001000  0.1581294  0.8534760  0.08603425
  0.6    0.005000  0.1468823  0.8689147  0.08563244
  0.6    0.010000  0.1482474  0.8663156  0.08812014
  0.6    0.050000  0.1753692  0.8307643  0.11395467
  0.6    0.075000  0.1926331  0.8105966  0.12714755
  0.6    0.100000  0.2079225  0.7961092  0.13922013
  0.6    0.125000  0.2217196  0.7873250  0.15063365
  0.6    0.150000  0.2355530  0.7806855  0.16271013
  0.6    0.175000  0.2496284  0.7755672  0.17528103
  0.6    0.200000  0.2644768  0.7687477  0.18883049
  0.6    0.225000  0.2792485  0.7634871  0.20218177
  0.6    0.250000  0.2941244  0.7586748  0.21561758
  0.6    0.275000  0.3093384  0.7510517  0.22924629
  0.6    0.300000  0.3247103  0.7390044  0.24280429
  0.6    0.325000  0.3401681  0.7184073  0.25623974
  0.6    0.350000  0.3552638  0.6859485  0.26917181
  0.6    0.375000  0.3689691  0.6506777  0.28091486
  0.6    0.400000  0.3811593  0.6164726  0.29128245
  0.6    0.425000  0.3918545  0.5960832  0.30026817
  0.6    0.450000  0.4008493  0.5904565  0.30777879
  0.6    0.475000  0.4067417        NaN  0.31268761
  0.6    0.500000  0.4067417        NaN  0.31268761
  0.7    0.000005  0.1716968  0.8329255  0.08750992
  0.7    0.000010  0.1716968  0.8329255  0.08750992
  0.7    0.000050  0.1715011  0.8332347  0.08750053
  0.7    0.000100  0.1699035  0.8357798  0.08731974
  0.7    0.000500  0.1623243  0.8473529  0.08657680
  0.7    0.001000  0.1571805  0.8548542  0.08587782
  0.7    0.005000  0.1470432  0.8684973  0.08590511
  0.7    0.010000  0.1492584  0.8646990  0.08925266
  0.7    0.050000  0.1809722  0.8231889  0.11816131
  0.7    0.075000  0.2000806  0.8012626  0.13291098
  0.7    0.100000  0.2161021  0.7885254  0.14579813
 [ reached getOption("max.print") -- omitted 97 rows ]

RMSE was used to select the optimal model using the smallest value.
The final values used for the model were alpha = 0 and lambda = 0.175.
ggplot(enet_fit) + scale_x_log10()

Evaluate all models

For completeness, estimate the non-regularized linear model as well.

set.seed(857)
linear_fit <- train(
  log_sale_price ~ . -sale_price,
  data = data_train,
  method = "lm",
  preProcess = c("center", "scale"),
  trControl = fit_control
)
resample_profile <- resamples(
  list("linear" = linear_fit,
       "ridge" = ridge_fit,
       "lasso" = lasso_fit,
       "elastic net" = enet_fit
  )
) 

summary(resample_profile)

Call:
summary.resamples(object = resample_profile)

Models: linear, ridge, lasso, elastic net 
Number of resamples: 10 

MAE 
                  Min.    1st Qu.     Median       Mean    3rd Qu.      Max. NA's
linear      0.07777197 0.07940461 0.09265443 0.08873995 0.09527491 0.0987329    0
ridge       0.07367948 0.08026915 0.08802844 0.08830684 0.09413163 0.1061065    0
lasso       0.07086193 0.08087593 0.08594562 0.08711685 0.09203586 0.1047683    0
elastic net 0.07367948 0.08026915 0.08802844 0.08830684 0.09413163 0.1061065    0

RMSE 
                  Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
linear      0.11238141 0.1364270 0.1721632 0.1771764 0.2204885 0.2404686    0
ridge       0.09860469 0.1175353 0.1319301 0.1429234 0.1472833 0.2196785    0
lasso       0.09536545 0.1233744 0.1333620 0.1481077 0.1520182 0.2372004    0
elastic net 0.09860469 0.1175353 0.1319301 0.1429234 0.1472833 0.2196785    0

Rsquared 
                 Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
linear      0.7147800 0.7634893 0.8286702 0.8249801 0.8875117 0.9207508    0
ridge       0.7488259 0.8582377 0.8939727 0.8762089 0.9185868 0.9253607    0
lasso       0.7119918 0.8498486 0.8874229 0.8664453 0.9117261 0.9298672    0
elastic net 0.7488259 0.8582377 0.8939727 0.8762089 0.9185868 0.9253607    0
bwplot(resample_profile)

Are differences between models large in a statistical sense? Not really. What we can certainly see is that the plain linear model’s variance is much larger than that of the penalized ones.

model_differences <- diff(resample_profile)
summary(model_differences)

Call:
summary.diff.resamples(object = model_differences)

p-value adjustment: bonferroni 
Upper diagonal: estimates of the difference
Lower diagonal: p-value for H0: difference = 0

MAE 
            linear ridge      lasso      elastic net
linear              0.0004331  0.0016231  0.0004331 
ridge       1                  0.0011900  0.0000000 
lasso       1      1                     -0.0011900 
elastic net 1      NA         1                     

RMSE 
            linear  ridge     lasso     elastic net
linear               0.034253  0.029069  0.034253  
ridge       0.09586           -0.005184  0.000000  
lasso       0.24831 0.49739              0.005184  
elastic net 0.09586 NA        0.49739              

Rsquared 
            linear ridge     lasso     elastic net
linear             -0.051229 -0.041465 -0.051229  
ridge       0.1170            0.009764  0.000000  
lasso       0.3528 0.3374              -0.009764  
elastic net 0.1170 NA        0.3374               
dotplot(model_differences)

Evaluate the chosen model on holdout set

RMSE(predict(enet_fit, newdata = data_test), data_test[["log_sale_price"]])
[1] 0.1268464
LS0tCnRpdGxlOiAiTGFiIHdlZWsgMSAtIHBlbmFsaXplZCBsaW5lYXIgbW9kZWxzIgpzdWJ0aXRsZTogIkRhdGEgU2NpZW5jZSAyOiBNYWNoaW5lIExlYXJuaW5nIENvbmNlcHRzIC0gQ0VVIDIwMjEiCmF1dGhvcjogIkphbm9zIEsuIERpdmVueWksIEplbm8gUGFsIgpkYXRlOiAnMjAyMS0wMi0wOCcKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICBodG1sX25vdGVib29rOgogICAgZGZfcHJpbnQ6IHBhZ2VkCi0tLQoKYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KEdHYWxseSkgICAjIGdncGxvdCBleHRlbnNpb25zCmxpYnJhcnkoY2FyZXQpCmxpYnJhcnkobWFncml0dHIpCmxpYnJhcnkoc2tpbXIpCmxpYnJhcnkoamFuaXRvcikgICMgZm9yIGRhdGEgY2xlYW5pbmcgcHVycG9zZXMKbGlicmFyeShnbG1uZXQpICAgIyB0aGUgbWFpbiBwYWNrYWdlIGZvciBwZW5hbGl6ZWQgbGluZWFyIG1vZGVscwpsaWJyYXJ5KGJyb29tKSAgICAjIGZvciB0aWR5aW5nIHJlZ3Jlc3Npb24gY29lZmZpY2llbnQgb3V0cHV0cwpsaWJyYXJ5KGtuaXRyKQpsaWJyYXJ5KGthYmxlRXh0cmEpICAjIGZvciBuaWNlciB0YWJsZXMgaW4gcm1hcmtkb3duCgp0aGVtZV9zZXQodGhlbWVfYncoKSkgICAjIGdsb2JhbGx5IHNldCBnZ3Bsb3QgdGhlbWUKYGBgCgpEaXNjbGFpbWVyOiB0aGlzIGxhYiBib3Jyb3dzIGxvdHMgb2YgaWRlYXMgYW5kIGV4YW1wbGVzIGZyb20gW3RoaXNdKGh0dHBzOi8vYWZpdC1yLmdpdGh1Yi5pby9yZWd1bGFyaXplZF9yZWdyZXNzaW9uKSBncmVhdCB0dXRvcmlhbC4KCiMjIERhdGE6IEFtZXMgSG91c2luZyBkYXRhc2V0CgpXZSBhcmUgZ29pbmcgdG8gcHJlZGljdCBob3VzZSBzYWxlcyBwcmljZXMgaW4gQW1lcywgSW93YS4gU2VlIFtoZXJlXShodHRwczovL2NyYW4uci1wcm9qZWN0Lm9yZy93ZWIvcGFja2FnZXMvQW1lc0hvdXNpbmcvaW5kZXguaHRtbCkuCmBgYHtyfQpsaWJyYXJ5KEFtZXNIb3VzaW5nKQpob3VzaW5nX2RhdGEgPC0gbWFrZV9hbWVzKCkKaG91c2luZ19kYXRhIDwtIGNsZWFuX25hbWVzKGhvdXNpbmdfZGF0YSkgIyBmcm9tamFuaXRlcu+8jOaJgOacieWIhumalOespuWSjOWkp+Wwj+WGmemDveiiq+e7n+S4gOS6hgpob3VzaW5nX2RhdGEgPC0gaG91c2luZ19kYXRhICU+JSBtdXRhdGUobG9nX3NhbGVfcHJpY2UgPSBsb2coc2FsZV9wcmljZSkpCnNraW0oaG91c2luZ19kYXRhKQpgYGAKCkxvb2sgYXQgdGhlIG91dGNvbWUgdmFyaWFibGUsIHRoZSBwcmljZSBvZiBob3VzZXMuCmBgYHtyfQpnZ3Bsb3QoZGF0YSA9IGhvdXNpbmdfZGF0YSwgYWVzKHggPSBzYWxlX3ByaWNlKSkgKyBnZW9tX2RlbnNpdHkoKQpgYGAKYGBge3J9CmdncGxvdChkYXRhID0gaG91c2luZ19kYXRhLCBhZXMoeCA9IGxvZ19zYWxlX3ByaWNlKSkgKyBnZW9tX2RlbnNpdHkoKQpgYGAKCkxldCdzIHNlZSBjb3JyZWxhdGlvbnMgb2YgZmVhdHVyZXMgYW5kIHRoZSBvdXRjb21lIHZhcmlhYmxlLiBCYXNlZCBvbiB0aGlzLApzaW1wbGUgYmVuY2htYXJrIG1vZGVscyBjYW4gcXVpY2tseSBiZSBwdXQgdG9nZXRoZXIuCmBgYHtyLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQpnZ2NvcnIoaG91c2luZ19kYXRhKSAjIGNvcnJlbGF0aW9uIHBsb3QsIGZyb20gR0dhbGx5IHBhY2thZ2UKYGBgCgpgYGB7ciwgbWVzc2FnZT1GQUxTRX0KZ2dwYWlycyhob3VzaW5nX2RhdGEsIGNvbHVtbnMgPSBjKCJsb2dfc2FsZV9wcmljZSIsICJncl9saXZfYXJlYSIsICJ0b3Rfcm1zX2Fidl9ncmQiLCAiZ2FyYWdlX2FyZWEiKSkKYGBgCgojIyMgQSBiYXNlbGluZSBtb2RlbAoKYGBge3J9CmxtKGxvZ19zYWxlX3ByaWNlIH4gZ3JfbGl2X2FyZWEgKyB0b3Rfcm1zX2Fidl9ncmQsIGRhdGEgPSBob3VzaW5nX2RhdGEpICU+JSAKICB0aWR5KCkgJT4lIAogIGthYmxlKGRpZ2l0cyA9IDMpICU+JSAKICBrYWJsZV9zdHlsaW5nKGZ1bGxfd2lkdGggPSBGKQpgYGAKCldlaXJkIHBhdHRlcm46IHNhbGVzIHByaWNlIGRlY3JlYXNlcyB3aXRoIHRoZSBudW1iZXIgb2Ygcm9vbXM/IFRoaXMgaXMgc3B1cmlvdXMgYW5kIGlzIGNhdXNlZCBieSB0aGUgaGlnaCBwb3NpdGl2ZSBjb3JyZWxhdGlvbiBiZXR3ZWVuIHRoZSBmZWF0dXJlIHZhcmlhYmxlcy4gVW5pdmFyaWF0ZSByZWdyZXNzaW9ucyBoYXZlIHRoZSBpbnR1aXRpdmUgc2lnbnM6CgpgYGB7cn0KbG0obG9nX3NhbGVfcHJpY2UgfiBncl9saXZfYXJlYSwgZGF0YSA9IGhvdXNpbmdfZGF0YSkgJT4lIAogIHRpZHkoKSAlPiUgCiAga2FibGUoZGlnaXRzID0gMykgJT4lIAogIGthYmxlX3N0eWxpbmcoZnVsbF93aWR0aCA9IEYpCmBgYAoKYGBge3J9CmxtKGxvZ19zYWxlX3ByaWNlIH4gdG90X3Jtc19hYnZfZ3JkLCBkYXRhID0gaG91c2luZ19kYXRhKSAlPiUKICB0aWR5KCkgJT4lIAogIGthYmxlKGRpZ2l0cyA9IDMpICU+JSAKICBrYWJsZV9zdHlsaW5nKGZ1bGxfd2lkdGggPSBGKQpgYGAKClBlbmFsaXplZCBtZXRob2RzIG9mZmVyIGEgc29sdXRpb24gdG8gdGhlc2Uga2luZHMgb2YgcGF0dGVybnMuCgojIyMgU2V0IHVwIHRyYWluaW5nIGFuZCB0ZXN0IChob2xkb3V0KSBkYXRhc2V0cwoKYGBge3J9CnNldC5zZWVkKDEyMzQpCnRyYWluaW5nX3JhdGlvIDwtIDAuNwp0cmFpbl9pbmRpY2VzIDwtIGNyZWF0ZURhdGFQYXJ0aXRpb24oCiAgeSA9IGhvdXNpbmdfZGF0YVtbImxvZ19zYWxlX3ByaWNlIl1dLAogIHRpbWVzID0gMSwKICBwID0gdHJhaW5pbmdfcmF0aW8sCiAgbGlzdCA9IEZBTFNFCikgJT4lIGFzLnZlY3RvcigpCmRhdGFfdHJhaW4gPC0gaG91c2luZ19kYXRhW3RyYWluX2luZGljZXMsIF0KZGF0YV90ZXN0IDwtIGhvdXNpbmdfZGF0YVstdHJhaW5faW5kaWNlcywgXQoKZml0X2NvbnRyb2wgPC0gdHJhaW5Db250cm9sKG1ldGhvZCA9ICJjdiIsIG51bWJlciA9IDEwKQpgYGAKCiMjIFBlbmFsaXplIGxhcmdlIGNvZWZmaWNpZW50czogdGhlIFJpZGdlIHJlZ3Jlc3Npb24KClRoZSByaWRnZSByZWdyZXNzaW9uIGFkZHMgYSBwZW5hbHR5IHRlcm0gdG8gdGhlIHN1bSBvZiBzcXVhcmVkIHJlc2lkdWFsczogdGhlIHN1bSBvZiBzcXVhcmVzCm9mIHRoZSByZWdyZXNzaW9uIGNvZWZmaWNpZW50cy4gVGhpcyBwdXRzIGEgY29zdCBvbiBoYXZpbmcgbGFyZ2UgY29lZmZpY2llbnRzLiBSZXN1bHQ6IGJpYXNlZCBidXQgbG93ZXIgdmFyaWFuY2UgbW9kZWwuCgpgYGB7cn0KZmVhdHVyZXMgPC0gc2V0ZGlmZihuYW1lcyhob3VzaW5nX2RhdGEpLCBjKCJzYWxlX3ByaWNlIiwgImxvZ19zYWxlX3ByaWNlIikpCmBgYAoKRmlyc3Qgd2UgYXJlIGdvaW5nIHRvIGRpcmVjdGx5IHdvcmsgd2l0aCB0aGUgYGdsbW5ldGAgcGFja2FnZSB0byBlc3RpbWF0ZSAKcGVuYWxpemVkIG1vZGVscy4gVGhlbiB3ZSBsb29rIGF0IGhvdyB0aGlzIGNhbiBiZSBpbXBsZW1lbnRlZCB0aHJvdWdoIGBjYXJldGAuCgpgYGB7cn0KIyBnbG1uZXQgbmVlZHMgaW5wdXRzIGFzIGEgbWF0cml4LiBtb2RlbC5tYXRyaXg6IGhhbmRsZXMgZmFjdG9yIHZhcmlhYmxlcwojIC0xOiB3ZSBkbyBub3QgbmVlZCB0aGUgaW50ZXJjZXB0IGFzIGdsbWVudCB3aWxsIGF1dG9tYXRpY2FsbHkgaW5jbHVkZSBpdAp4X3RyYWluIDwtIG1vZGVsLm1hdHJpeCggfiAuIC0xLCBkYXRhX3RyYWluWywgZmVhdHVyZXMsIHdpdGggPSBGQUxTRV0pCmRpbSh4X3RyYWluKQoKIyBzdGFuZGFyZGl6YXRpb24gb2YgdmFyaWFibGVzIGlzIGF1dG9tYXRpY2FsbHkgZG9uZSBieSBnbG1uZXQKCiMgaG93IG11Y2ggcGVuYWx0eSBkbyB3ZSB3YW50IHRvIGFwcGx5PyBzZWxlY3Qgd2l0aCBDVgpsYW1iZGFfZ3JpZCA8LSAxMF5zZXEoMiwtNSxsZW5ndGg9MTAwKSAgCgpzZXQuc2VlZCgxMjM0KQpyaWRnZV9tb2RlbCA8LSBnbG1uZXQoCiAgeCA9IHhfdHJhaW4sIHkgPSBkYXRhX3RyYWluW1sibG9nX3NhbGVfcHJpY2UiXV0sIAogIGZhbWlseSA9ICJnYXVzc2lhbiIsICMgZm9yIGNvbnRpbnVvdXMgcmVzcG9uc2UKICBhbHBoYSA9IDAgICMgdGhlIHJpZGdlIG1vZGVsCikKCnBsb3QocmlkZ2VfbW9kZWwsIHh2YXIgPSAibGFtYmRhIikgIyDkuIDmnaHnur/mmK/kuIDkuKp2YXJpYWJsZe+8jCBwZW5hbHR56LaK5aSa77yMc2hyaW5rYWdl6LaK5aSa77yM6LaK6IGa5ouiCmBgYAoKTG9vayBhdCBzb21lIGluZGl2aWR1YWwgY29lZmZpY2llbnRzLgpgYGB7cn0KIyBoZWxwZXIgZnVuY3Rpb24gdG8gZXh0cmFjdCB0aGUgY29lZmZpY2llbnQgc2VxdWVuY2UgYXMgYSBkYXRhLnRhYmxlCmdldF9nbG1uZXRfY29lZmZfc2VxdWVuY2UgPC0gZnVuY3Rpb24oZ2xtbmV0X21vZGVsKSB7CiAgY29lZmZfc2VxdWVuY2UgPC0gY29lZihnbG1uZXRfbW9kZWwpICU+JSB0aWR5KCkKICBuYW1lcyhjb2VmZl9zZXF1ZW5jZSkgPC0gYygidmFyaWFibGUiLCAibGFtYmRhX2lkIiwgInZhbHVlIikKCiAgbGFtYmRhcyA8LSB0aWJibGUoCiAgICBsYW1iZGEgPSBnbG1uZXRfbW9kZWwkbGFtYmRhLCAKICAgIGxhbWJkYV9pZCA9IHBhc3RlMCgicyIsIDA6KGxlbmd0aChnbG1uZXRfbW9kZWwkbGFtYmRhKSAtIDEpKQogICkKICAKICBkcGx5cjo6aW5uZXJfam9pbihjb2VmZl9zZXF1ZW5jZSwgbGFtYmRhcywgYnkgPSAibGFtYmRhX2lkIikgCn0KYGBgCgpgYGB7cn0KcmlkZ2VfY29lZmZzIDwtIGdldF9nbG1uZXRfY29lZmZfc2VxdWVuY2UocmlkZ2VfbW9kZWwpCmBgYAoKYGBge3J9CnNlbGVjdGVkX3ZhcmlhYmxlcyA8LSBjKCJncl9saXZfYXJlYSIsICJ0b3Rfcm1zX2Fidl9ncmQiLCAiZ2FyYWdlX2FyZWEiLCAgImtpdGNoZW5fYWJ2X2dyIikKZ2dwbG90KAogIGRhdGEgPSByaWRnZV9jb2VmZnMgJT4lIGZpbHRlcih2YXJpYWJsZSAlaW4lIHNlbGVjdGVkX3ZhcmlhYmxlcyksCiAgYWVzKHggPSBsb2cobGFtYmRhKSwgeSA9IHZhbHVlKSkgKwogICAgZ2VvbV9saW5lKCkgKwogIGZhY2V0X3dyYXAofiB2YXJpYWJsZSwgc2NhbGVzID0gImZyZWVfeSIsIG5jb2wgPSAxKQpgYGAKCldlIGNhbiB1c2UgY3Jvc3MtdmFsaWRhdGlvbiB0byBkZXRlcm1pbmUgdGhlIG9wdGltYWwgcGVuYWx0eSB0ZXJtIHdlaWdodC4gVHdvIGxhbWJkYSB2YWx1ZXMgbWFya2VkIG9uIHRoZSBwbG90OiBvbmUgd2l0aCB0aGUgbWluaW1hbCBDViBSTVNFLCB0aGUgb3RoZXIgaXMgdGhlIHNpbXBsZXN0IG1vZGVsIChoaWdoZXN0IGxhbWJkYSkgd2hpY2ggY29udGFpbnMgdGhlIG9wdGltYWwgbGFtYmRhJ3MgZXJyb3Igd2l0aGluIG9uZSBzdGFuZGFyZCBkZXZpYXRpb24uIFRoYXQgaXMsIGl0IGdpdmVzIHRoZSBzaW1wbGVzdCBtb2RlbCB0aGF0IGlzIHN0aWxsICJnb29kIGVub3VnaCIuCgpgYGB7cn0Kc2V0LnNlZWQoMTIzNCkKcmlkZ2VfbW9kZWxfY3YgPC0gY3YuZ2xtbmV0KAogIHggPSB4X3RyYWluLCB5ID0gZGF0YV90cmFpbltbImxvZ19zYWxlX3ByaWNlIl1dLCAKICBmYW1pbHkgPSAiZ2F1c3NpYW4iLAogIGFscGhhID0gMCwKICBuZm9sZHMgPSAxMAopCgpiZXN0X2xhbWJkYSA8LSByaWRnZV9tb2RlbF9jdiRsYW1iZGEubWluCm1lc3NhZ2UocGFzdGUwKCJUaGUgb3B0aW1hbGx5IGNob3NlbiBwZW5hbHR5IHBhcmFtZXRlcjogIiwgYmVzdF9sYW1iZGEpKQoKaGlnaGVzdF9nb29kX2Vub3VnaF9sYW1iZGEgPC0gcmlkZ2VfbW9kZWxfY3YkbGFtYmRhLjFzZQptZXNzYWdlKHBhc3RlMCgiVGhlIGhpZ2hlc3QgZ29vZCBlbm91Z2ggcGVuYWx0eSBwYXJhbWV0ZXI6ICIsIGhpZ2hlc3RfZ29vZF9lbm91Z2hfbGFtYmRhKSkKYGBgCgpgYGB7cn0KcGxvdChyaWRnZV9tb2RlbF9jdikKYGBgCgpXZSBjYW4gYWxzbyB1c2UgYGNhcmV0YCB0byBlc3RpbWF0ZSByaWRnZSBtb2RlbHMuIFRoaXMgbGV0cyB1cyBjb21wYXJlIGl0IGxhdGVyIHRvIGFueQpvdGhlciBtb2RlbCBlc3RpbWF0ZWQgd2l0aCBjYXJldCwgdXNpbmcsIGZvciBleGFtcGxlLCBjcm9zcy12YWxpZGF0aW9uIHdpdGggZXhhY3RseSB0aGUKc2FtZSBmb2xkcy4KCmBgYHtyLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQojIHJpZGdlIG1vZGVsCnJpZGdlX3R1bmVfZ3JpZCA8LSBleHBhbmQuZ3JpZCgKICAiYWxwaGEiID0gYygwKSwKICAibGFtYmRhIiA9IHNlcSgwLjA1LCAwLjUsIGJ5ID0gMC4wMjUpCikKCnNldC5zZWVkKDg1NykKcmlkZ2VfZml0IDwtIHRyYWluKAogIGxvZ19zYWxlX3ByaWNlIH4gLiAtc2FsZV9wcmljZSwKICBkYXRhID0gZGF0YV90cmFpbiwKICBtZXRob2QgPSAiZ2xtbmV0IiwKICBwcmVQcm9jZXNzID0gYygiY2VudGVyIiwgInNjYWxlIiksCiAgdHVuZUdyaWQgPSByaWRnZV90dW5lX2dyaWQsCiAgdHJDb250cm9sID0gZml0X2NvbnRyb2wKKQpgYGAKCmBgYHtyfQpyaWRnZV9maXQKYGBgCgpgYGB7cn0KZ2dwbG90KHJpZGdlX2ZpdCkgCmBgYAoKIyMgQW5vdGhlciB2YXJpYW50OiBMQVNTTyByZWdyZXNzaW9uCgpXaGlsZSBSaWRnZSBhcHBsaWVzIGEgY29uc3RyYWludCBvbiB0aGUgc3VtIG9mIHNxdWFyZXMgb2YgY29lZmZpY2llbnRzLCBMQVNTTyBkb2VzIHRoZSBzYW1lIGZvciB0aGUgc3VtIG9mIHRoZSBfX2Fic29sdXRlIHZhbHVlc19fIG9mIGNvZWZmaWNpZW50cy4KClRoaXMgc2VlbWluZ2x5IHNtYWxsIGRpZmZlcmVuY2UgaGFzIGltcG9ydGFudCBzY29uc2VxdWVuY2VzOiBzb21lIGNvZWZmaWNpZW50cyBhcmUgc2V0IGV4YWN0bHkgdG8gemVybywgb3RoZXJzIGFyZSBvbmx5IHNocnVuayB0b3dhcmRzIHplcm8uCmBgYHtyfQpzZXQuc2VlZCgxMjM0KQpsYXNzb19tb2RlbCA8LSBnbG1uZXQoCiAgeCA9IHhfdHJhaW4sIHkgPSBkYXRhX3RyYWluW1sibG9nX3NhbGVfcHJpY2UiXV0sIAogIGZhbWlseSA9ICJnYXVzc2lhbiIsCiAgYWxwaGEgPSAxICAjIHRoZSBsYXNzbyBtb2RlbAopCgpwbG90KGxhc3NvX21vZGVsLCB4dmFyID0gImxhbWJkYSIpCmBgYApBZ2FpbiwgbGV0J3MgbG9vayBhdCBpbmRpdmlkdWFsIGNvZWZmaWNpZW50cy4gV2UgY2FuIHNlZSB0aGF0IHNvbWUgYXJlIHNldCBleGFjdGx5IHRvIHplcm8gZm9yIGhpZ2hlciB2YWx1ZXMgb2YgdGhlIHBlbmFsdHkgdGVybS4gVGhpcyBpcyBpbiBjb250cmFzdCB0byB3aGF0IHdlIHNhdyB3aXRoIHRoZSBSaWRnZSBtb2RlbC4KCmBgYHtyfQpsYXNzb19jb2VmZnMgPC0gZ2V0X2dsbW5ldF9jb2VmZl9zZXF1ZW5jZShsYXNzb19tb2RlbCkKYGBgCgpgYGB7cn0Kc2VsZWN0ZWRfdmFyaWFibGVzIDwtIGMoImdyX2xpdl9hcmVhIiwgInRvdF9ybXNfYWJ2X2dyZCIsICJnYXJhZ2VfYXJlYSIsICJraXRjaGVuX2Fidl9nciIpCmdncGxvdCgKICBkYXRhID0gbGFzc29fY29lZmZzICU+JSBmaWx0ZXIodmFyaWFibGUgJWluJSBzZWxlY3RlZF92YXJpYWJsZXMpLAogIGFlcyh4ID0gbG9nKGxhbWJkYSksIHkgPSB2YWx1ZSkpICsKICAgIGdlb21fbGluZSgpICsKICBmYWNldF93cmFwKH4gdmFyaWFibGUsIHNjYWxlcyA9ICJmcmVlX3kiLCBuY29sID0gMSkKYGBgCgpBZ2Fpbiwgd2UgY2FuIGFwcGx5IGNyb3NzLXZhbGlkYXRpb24gdG8gZGV0ZXJtaW5lIHRoZSBvcHRpbWFsIHZhbHVlIGZvciB0aGUgcGVuYWx0eSB0ZXJtLgoKYGBge3J9CnNldC5zZWVkKDEyMzQpCmxhc3NvX21vZGVsX2N2IDwtIGN2LmdsbW5ldCgKICB4ID0geF90cmFpbiwgeSA9IGRhdGFfdHJhaW5bWyJsb2dfc2FsZV9wcmljZSJdXSwgCiAgZmFtaWx5ID0gImdhdXNzaWFuIiwKICBhbHBoYSA9IDEsCiAgbmZvbGRzID0gMTAKKQoKYmVzdF9sYW1iZGEgPC0gbGFzc29fbW9kZWxfY3YkbGFtYmRhLm1pbgptZXNzYWdlKHBhc3RlMCgiVGhlIG9wdGltYWxseSBjaG9zZW4gcGVuYWx0eSBwYXJhbWV0ZXI6ICIsIGJlc3RfbGFtYmRhKSkKCmhpZ2hlc3RfZ29vZF9lbm91Z2hfbGFtYmRhIDwtIGxhc3NvX21vZGVsX2N2JGxhbWJkYS4xc2UKbWVzc2FnZShwYXN0ZTAoIlRoZSBoaWdoZXN0IGdvb2QgZW5vdWdoIHBlbmFsdHkgcGFyYW1ldGVyOiAiLCBoaWdoZXN0X2dvb2RfZW5vdWdoX2xhbWJkYSkpCmBgYAoKYGBge3J9CnBsb3QobGFzc29fbW9kZWxfY3YpCmBgYAoKRml0dGluZyBMQVNTTyBtb2RlbHMgd2l0aCBgY2FyZXRgIGlzIHNpbWlsYXIgdG8gdGhhdCBvZiBSaWRnZS4KYGBge3IsIHdhcm5pbmc9RkFMU0V9CnRlbnBvd2VycyA8LSAxMF5zZXEoLTEsIC01LCBieSA9IC0xKQoKbGFzc29fdHVuZV9ncmlkIDwtIGV4cGFuZC5ncmlkKAogICJhbHBoYSIgPSBjKDEpLAogICJsYW1iZGEiID0gYyh0ZW5wb3dlcnMsIHRlbnBvd2VycyAvIDIpIAopCgpzZXQuc2VlZCg4NTcpCmxhc3NvX2ZpdCA8LSB0cmFpbigKICBsb2dfc2FsZV9wcmljZSB+IC4gLXNhbGVfcHJpY2UsCiAgZGF0YSA9IGRhdGFfdHJhaW4sCiAgbWV0aG9kID0gImdsbW5ldCIsCiAgcHJlUHJvY2VzcyA9IGMoImNlbnRlciIsICJzY2FsZSIpLAogIHR1bmVHcmlkID0gbGFzc29fdHVuZV9ncmlkLAogIHRyQ29udHJvbCA9IGZpdF9jb250cm9sCikKYGBgCmBgYHtyfQpsYXNzb19maXQKYGBgCgpgYGB7cn0KZ2dwbG90KGxhc3NvX2ZpdCkgKyBzY2FsZV94X2xvZzEwKCkKYGBgCgojIyMgVmFyaWFibGUgc2VsZWN0aW9uIC0gd2h5IGlzIGl0IGhhcHBlbmluZz8KCiFbU291cmNlOiBJbnRyb2R1Y3Rpb24gdG8gU3RhdGlzdGljYWwgTGVhcm5pbmddKGltYWdlcy9sYXNzby1yaWRnZS1hbHQtZm9ybS5wbmcpClRoZSBjb25zdHJhaW50cyBhcmUgZGlmZmVyZW50OiBMQVNTTydzIGhhcyBjb3JuZXJzLCBSaWRnZSdzIGlzIHNtb290aC4gQ29ybmVycyBtZWFuIHNvbWUgY29lZmZpY2llbnRzIGJlaW5nIGV4YWN0bHkgemVyby4KCiFbU291cmNlOiBJbnRyb2R1Y3Rpb24gdG8gU3RhdGlzdGljYWwgTGVhcm5pbmddKGltYWdlcy9pc2xyX2xhc3NvX3ZzX3JpZGdlLnBuZykKCiMjIENvbWJpbmUgUmlkZ2UgYW5kIExBU1NPOiBFbGFzdGljIG5ldAoKV2UgY2FuIGNvbWJpbmUgYm90aCB0eXBlcyBvZiBwZW5hbHRpZXMuIExBU1NPIGlzIGF0dHJhY3RpdmUgc2luY2UgaXQgcGVyZm9ybXMgcHJpbmNpcGxlZCB2YXJpYWJsZSBzZWxlY3Rpb24uIEhvd2V2ZXIsIHdoZW4gaGF2aW5nIGNvcnJlbGF0ZWQgZmVhdHVyZXMsIHR5cGljYWxseSBvbmx5IG9uZSBvZiB0aGVtIC0gcXVpdGUgYXJiaXRyYXJpbHkgLSBpcyBrZXB0IGluIHRoZSBtb2RlbC4gUmlkZ2Ugc2ltdWx0YW5lb3VzbHkgc2hyaW5rcyBjb2VmZmljaWVudHMgb2YgdGhlc2UgdG93YXJkcyB6ZXJvLiBJZiB3ZSBhcHBseSBwZW5hbHRpZXMgb2YgYm90aCB0aGUgYWJzb2x1dGUgdmFsdWVzIGFuZCB0aGUgc3F1YXJlcyBvZiB0aGUgY29lZmZpY2llbnRzLCBib3RoIHZpcnR1ZXMgYXJlIHJldGFpbmVkLiBUaGlzIG1ldGhvZCBpcyBjYWxsZWQgRWxhc3RpYyBuZXQuCgpgYGB7ciwgbWVzc2FnZSA9IEZBTFNFLCB3YXJuaW5nPUZBTFNFfQplbmV0X3R1bmVfZ3JpZCA8LSBleHBhbmQuZ3JpZCgKICAiYWxwaGEiID0gc2VxKDAsIDEsIGJ5ID0gMC4xKSwKICAibGFtYmRhIiA9IHVuaW9uKGxhc3NvX3R1bmVfZ3JpZFtbImxhbWJkYSJdXSwgcmlkZ2VfdHVuZV9ncmlkW1sibGFtYmRhIl1dKQopCgpzZXQuc2VlZCg4NTcpCmVuZXRfZml0IDwtIHRyYWluKAogIGxvZ19zYWxlX3ByaWNlIH4gLiAtc2FsZV9wcmljZSwKICBkYXRhID0gZGF0YV90cmFpbiwKICBtZXRob2QgPSAiZ2xtbmV0IiwKICBwcmVQcm9jZXNzID0gYygiY2VudGVyIiwgInNjYWxlIiksCiAgdHVuZUdyaWQgPSBlbmV0X3R1bmVfZ3JpZCwKICB0ckNvbnRyb2wgPSBmaXRfY29udHJvbAopCmBgYAoKYGBge3J9CmVuZXRfZml0CmBgYAoKYGBge3J9CmdncGxvdChlbmV0X2ZpdCkgKyBzY2FsZV94X2xvZzEwKCkKYGBgCgojIyBFdmFsdWF0ZSBhbGwgbW9kZWxzCgpGb3IgY29tcGxldGVuZXNzLCBlc3RpbWF0ZSB0aGUgbm9uLXJlZ3VsYXJpemVkIGxpbmVhciBtb2RlbCBhcyB3ZWxsLgpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0Kc2V0LnNlZWQoODU3KQpsaW5lYXJfZml0IDwtIHRyYWluKAogIGxvZ19zYWxlX3ByaWNlIH4gLiAtc2FsZV9wcmljZSwKICBkYXRhID0gZGF0YV90cmFpbiwKICBtZXRob2QgPSAibG0iLAogIHByZVByb2Nlc3MgPSBjKCJjZW50ZXIiLCAic2NhbGUiKSwKICB0ckNvbnRyb2wgPSBmaXRfY29udHJvbAopCmBgYAoKYGBge3J9CnJlc2FtcGxlX3Byb2ZpbGUgPC0gcmVzYW1wbGVzKAogIGxpc3QoImxpbmVhciIgPSBsaW5lYXJfZml0LAogICAgICAgInJpZGdlIiA9IHJpZGdlX2ZpdCwKICAgICAgICJsYXNzbyIgPSBsYXNzb19maXQsCiAgICAgICAiZWxhc3RpYyBuZXQiID0gZW5ldF9maXQKICApCikgCgpzdW1tYXJ5KHJlc2FtcGxlX3Byb2ZpbGUpCmBgYAoKYGBge3J9CmJ3cGxvdChyZXNhbXBsZV9wcm9maWxlKQpgYGAKCkFyZSBkaWZmZXJlbmNlcyBiZXR3ZWVuIG1vZGVscyBsYXJnZSBpbiBhIHN0YXRpc3RpY2FsIHNlbnNlPyBOb3QgcmVhbGx5LiBXaGF0IHdlIGNhbiBjZXJ0YWlubHkgc2VlIGlzIHRoYXQgdGhlIHBsYWluIGxpbmVhciBtb2RlbCdzIHZhcmlhbmNlIGlzIG11Y2ggbGFyZ2VyIHRoYW4gdGhhdCBvZiB0aGUgcGVuYWxpemVkIG9uZXMuCgpgYGB7cn0KbW9kZWxfZGlmZmVyZW5jZXMgPC0gZGlmZihyZXNhbXBsZV9wcm9maWxlKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KG1vZGVsX2RpZmZlcmVuY2VzKQpgYGAKCmBgYHtyfQpkb3RwbG90KG1vZGVsX2RpZmZlcmVuY2VzKQpgYGAKCiMjIyBFdmFsdWF0ZSB0aGUgY2hvc2VuIG1vZGVsIG9uIGhvbGRvdXQgc2V0CgpgYGB7cn0KUk1TRShwcmVkaWN0KGVuZXRfZml0LCBuZXdkYXRhID0gZGF0YV90ZXN0KSwgZGF0YV90ZXN0W1sibG9nX3NhbGVfcHJpY2UiXV0pCmBgYAoKIyMgUmVjb21tZW5kZWQgcmVhZGluZ3MKCi0gW1R1dG9yaWFsIG9mIHRoZSBBaXIgRm9yY2UgSW5zdGl0dXRlIG9mIFRlY2hub2xvZ3ldKGh0dHBzOi8vYWZpdC1yLmdpdGh1Yi5pby9yZWd1bGFyaXplZF9yZWdyZXNzaW9uKSAtIGJhc2VkIG9uIHdoaWNoIHRoaXMgbGFiIHdhcyBwdXQgdG9nZXRoZXIKLSBbRWxlbWVudHMgb2YgU3RhdGlzdGljYWwgTGVhcm5pbmddKGh0dHBzOi8vd2ViLnN0YW5mb3JkLmVkdS9+aGFzdGllL0VsZW1TdGF0TGVhcm4vKSAtIHRoZSBzdGFuZGFyZCByZWZlcmVuY2UgYm9vayBvZiBtYWNoaW5lIGxlYXJuaW5nIChmcmVlbHkgYXZhaWxhYmxlKQotIFtJbnRyb2R1Y3Rpb24gdG8gU3RhdGlzdGljYWwgTGVhcm5pbmddKGh0dHA6Ly93d3ctYmNmLnVzYy5lZHUvfmdhcmV0aC9JU0wvKSAtIGEgbW9yZSBhY2Nlc3NpYmxlIHRyZWF0bWVudCBvZiB0aGUgc2FtZSBtYXRlcmlhbCBmcm9tIHRoZSBzYW1lIGF1dGhvcnMgKHdpdGggUiBjb2RlcykKLSBbQXBwbGllZCBQcmVkaWN0aXZlIE1vZGVsbGluZ10oaHR0cDovL2FwcGxpZWRwcmVkaWN0aXZlbW9kZWxpbmcuY29tLykgLSBhIHRob3JvdWdoIGFuZCBwcmFjdGljYWwgZ3VpZGUgdG8gcHJlZGljdGl2ZSBtb2RlbGxpbmcgZnJvbSB0aGUgYXV0aG9yIG9mIHRoZSBgY2FyZXRgIHBhY2thZ2UgKHdpdGggUiBjb2RlKQo=